Analyse der Matoma-HaNS-Daten

Author

Sebastian Sauer

Published

May 23, 2025

1 Setup

1.1 R-Pakete starten

Show the code
library(targets)
library(tidyverse)
library(ggokabeito)
library(easystats)
library(gt)
library(ggfittext)
library(scales)
library(visdat)
Show the code
theme_set(theme_minimal())

1.2 Roh-Daten laden und inspizieren (data_all_fact)

JSON-Daten wurden nicht importiert, da offenbar nur redundante Daten enthalten sind.

Show the code
tar_load(data_all_fct)

1.2.1 Dimension

Der Roh-Datensatz verfügt über

  • 160 Zeilen
  • 2801 Spalten (Dubletten und Spalten mit Bildern bereits entfernt)

Jede Zeile entspricht einem “Visit”.

1.2.2 Erster Blick

Show the code
data_all_fct_head100 <- 
data_all_fct %>% 
  select(1:100) %>% 
  slice_head(n = 100) 
Show the code
data_all_fct_head100 %>% 
  visdat::vis_dat()

1.2.3 Namen (1-100)

Show the code
data_all_fct_head100 %>% 
  names()
  [1] "file_id"                          "idvisit"                         
  [3] "visitip"                          "visitorid"                       
  [5] "fingerprint"                      "actiondetails_0_type"            
  [7] "actiondetails_0_url"              "actiondetails_0_pageidaction"    
  [9] "actiondetails_0_idpageview"       "actiondetails_0_servertimepretty"
 [11] "actiondetails_0_pageid"           "actiondetails_0_eventcategory"   
 [13] "actiondetails_0_eventaction"      "actiondetails_0_timestamp"       
 [15] "actiondetails_0_title"            "actiondetails_0_subtitle"        
 [17] "actiondetails_1_type"             "actiondetails_1_url"             
 [19] "actiondetails_1_pageidaction"     "actiondetails_1_idpageview"      
 [21] "actiondetails_1_servertimepretty" "actiondetails_1_pageid"          
 [23] "actiondetails_1_timespent"        "actiondetails_1_timespentpretty" 
 [25] "actiondetails_1_pageviewposition" "actiondetails_1_title"           
 [27] "actiondetails_1_subtitle"         "actiondetails_1_timestamp"       
 [29] "actiondetails_2_type"             "actiondetails_2_url"             
 [31] "actiondetails_2_pageidaction"     "actiondetails_2_idpageview"      
 [33] "actiondetails_2_servertimepretty" "actiondetails_2_pageid"          
 [35] "actiondetails_2_eventcategory"    "actiondetails_2_eventaction"     
 [37] "actiondetails_2_pageviewposition" "actiondetails_2_timestamp"       
 [39] "actiondetails_2_title"            "actiondetails_2_subtitle"        
 [41] "actiondetails_3_type"             "actiondetails_3_url"             
 [43] "actiondetails_3_pageidaction"     "actiondetails_3_idpageview"      
 [45] "actiondetails_3_servertimepretty" "actiondetails_3_pageid"          
 [47] "actiondetails_3_eventcategory"    "actiondetails_3_eventaction"     
 [49] "actiondetails_3_pageviewposition" "actiondetails_3_timestamp"       
 [51] "actiondetails_3_title"            "actiondetails_3_subtitle"        
 [53] "actiondetails_4_type"             "actiondetails_4_url"             
 [55] "actiondetails_4_pageidaction"     "actiondetails_4_idpageview"      
 [57] "actiondetails_4_servertimepretty" "actiondetails_4_pageid"          
 [59] "actiondetails_4_timespent"        "actiondetails_4_timespentpretty" 
 [61] "actiondetails_4_pageviewposition" "actiondetails_4_title"           
 [63] "actiondetails_4_subtitle"         "actiondetails_4_timestamp"       
 [65] "actiondetails_5_type"             "actiondetails_5_url"             
 [67] "actiondetails_5_pageidaction"     "actiondetails_5_idpageview"      
 [69] "actiondetails_5_servertimepretty" "actiondetails_5_pageid"          
 [71] "actiondetails_5_eventcategory"    "actiondetails_5_eventaction"     
 [73] "actiondetails_5_pageviewposition" "actiondetails_5_timestamp"       
 [75] "actiondetails_5_title"            "actiondetails_5_subtitle"        
 [77] "actiondetails_6_type"             "actiondetails_6_url"             
 [79] "actiondetails_6_pageidaction"     "actiondetails_6_idpageview"      
 [81] "actiondetails_6_servertimepretty" "actiondetails_6_pageid"          
 [83] "actiondetails_6_eventcategory"    "actiondetails_6_eventaction"     
 [85] "actiondetails_6_pageviewposition" "actiondetails_6_timestamp"       
 [87] "actiondetails_6_title"            "actiondetails_6_subtitle"        
 [89] "actiondetails_7_type"             "actiondetails_7_url"             
 [91] "actiondetails_7_pageidaction"     "actiondetails_7_idpageview"      
 [93] "actiondetails_7_servertimepretty" "actiondetails_7_pageid"          
 [95] "actiondetails_7_timespent"        "actiondetails_7_timespentpretty" 
 [97] "actiondetails_7_pageviewposition" "actiondetails_7_title"           
 [99] "actiondetails_7_subtitle"         "actiondetails_7_timestamp"       

1.2.4 Werte der erst 100 Spalten

Show the code
data_all_fct_head100 %>% 
  glimpse()
Rows: 100
Columns: 100
$ file_id                          <fct> matomo_export_2023-10-04.csv, matomo_…
$ idvisit                          <fct> 6, 5, 4, 2, 3, 1, 25, 26, 24, 22, 23,…
$ visitip                          <fct> 141.75.152.0, 141.75.168.0, 141.75.15…
$ visitorid                        <fct> de3a10060112d977, 92f090e11fdbefbe, d…
$ fingerprint                      <fct> 379a01acad44b5c7, fb231a0ec5c2f0c8, 3…
$ actiondetails_0_type             <fct> event, action, action, action, action…
$ actiondetails_0_url              <fct> https://hans.th-nuernberg.de/channels…
$ actiondetails_0_pageidaction     <fct> 11, 122, 6, 6, 2, 2, 252, 254, 254, 2…
$ actiondetails_0_idpageview       <fct> KjY5Mu, tv7t1g, OqlJp6, SqdDri, t8YsD…
$ actiondetails_0_servertimepretty <fct> "Oct 4, 2023 20:27:32", "Oct 4, 2023 …
$ actiondetails_0_pageid           <fct> 483, 308, 305, 92, 222, 1, 972, 973, …
$ actiondetails_0_eventcategory    <fct> click_button, NA, NA, NA, NA, NA, cli…
$ actiondetails_0_eventaction      <fct> Kanäle, NA, NA, NA, NA, NA, Kanäle, N…
$ actiondetails_0_timestamp        <fct> 2023-10-04 20:27:32, 2023-10-04 19:45…
$ actiondetails_0_title            <fct> Event, HAnS, HAnS, HAnS, HAnS, HAnS, …
$ actiondetails_0_subtitle         <fct> "Category: \"\"click_button', Action:…
$ actiondetails_1_type             <fct> action, event, event, event, NA, even…
$ actiondetails_1_url              <fct> https://hans.th-nuernberg.de/?evalId=…
$ actiondetails_1_pageidaction     <fct> 6, 123, 7, 7, NA, 3, 251, NA, 256, 2,…
$ actiondetails_1_idpageview       <fct> awdDAu, tv7t1g, OqlJp6, SqdDri, NA, J…
$ actiondetails_1_servertimepretty <fct> "Oct 4, 2023 20:27:33", "Oct 4, 2023 …
$ actiondetails_1_pageid           <fct> 484, 309, 306, 93, NA, 2, 974, NA, 97…
$ actiondetails_1_timespent        <fct> 22, NA, NA, NA, NA, NA, 23, NA, NA, 6…
$ actiondetails_1_timespentpretty  <fct> 22s, NA, NA, NA, NA, NA, 23s, NA, NA,…
$ actiondetails_1_pageviewposition <fct> 1, 1, 1, 1, NA, 1, 1, NA, 1, 2, 1, NA…
$ actiondetails_1_title            <fct> HAnS, Event, Event, Event, NA, Event,…
$ actiondetails_1_subtitle         <fct> "https://hans.th-nuernberg.de/?evalId…
$ actiondetails_1_timestamp        <fct> 2023-10-04 20:27:33, 2023-10-04 19:45…
$ actiondetails_2_type             <fct> event, event, action, action, NA, act…
$ actiondetails_2_url              <fct> https://hans.th-nuernberg.de/channels…
$ actiondetails_2_pageidaction     <fct> 11, 123, 10, 10, NA, 6, 256, NA, 251,…
$ actiondetails_2_idpageview       <fct> KjY5Mu, tv7t1g, KjY5Mu, rU2DPV, NA, h…
$ actiondetails_2_servertimepretty <fct> "Oct 4, 2023 20:27:33", "Oct 4, 2023 …
$ actiondetails_2_pageid           <fct> 485, 310, 307, 94, NA, 3, 975, NA, 97…
$ actiondetails_2_eventcategory    <fct> click_button, click_button, NA, NA, N…
$ actiondetails_2_eventaction      <fct> Medien, Abmelden, NA, NA, NA, NA, Med…
$ actiondetails_2_pageviewposition <fct> 1, 1, 2, 2, NA, 2, NA, NA, 2, 3, 1, N…
$ actiondetails_2_timestamp        <fct> 2023-10-04 20:27:33, 2023-10-04 19:45…
$ actiondetails_2_title            <fct> Event, Event, HAnS, HAnS, NA, HAnS, E…
$ actiondetails_2_subtitle         <fct> "Category: \"\"click_button', Action:…
$ actiondetails_3_type             <fct> event, action, NA, event, NA, event, …
$ actiondetails_3_url              <fct> https://hans.th-nuernberg.de/?evalId=…
$ actiondetails_3_pageidaction     <fct> 7, 2, NA, 11, NA, 7, 252, NA, NA, NA,…
$ actiondetails_3_idpageview       <fct> awdDAu, UTm1cZ, NA, rU2DPV, NA, hZ9df…
$ actiondetails_3_servertimepretty <fct> "Oct 4, 2023 20:27:44", "Oct 4, 2023 …
$ actiondetails_3_pageid           <fct> 486, 311, NA, 95, NA, 4, 976, NA, NA,…
$ actiondetails_3_eventcategory    <fct> click_button, NA, NA, click_channelca…
$ actiondetails_3_eventaction      <fct> Kanäle, NA, NA, GDI, NA, Kanäle, Kanä…
$ actiondetails_3_pageviewposition <fct> 1, 2, NA, 2, NA, 2, 1, NA, NA, 4, 2, …
$ actiondetails_3_timestamp        <fct> 2023-10-04 20:27:44, 2023-10-04 19:45…
$ actiondetails_3_title            <fct> Event, HAnS, NA, Event, NA, Event, Ev…
$ actiondetails_3_subtitle         <fct> "Category: \"\"click_button', Action:…
$ actiondetails_4_type             <fct> action, event, NA, search, NA, action…
$ actiondetails_4_url              <fct> https://hans.th-nuernberg.de/channels…
$ actiondetails_4_pageidaction     <fct> 10, 3, NA, NA, NA, 10, 254, NA, NA, 2…
$ actiondetails_4_idpageview       <fct> ItPeDS, UTm1cZ, NA, oP3co8, NA, YNDCa…
$ actiondetails_4_servertimepretty <fct> "Oct 4, 2023 20:27:44", "Oct 4, 2023 …
$ actiondetails_4_pageid           <fct> 487, 312, NA, 96, NA, 5, 977, NA, NA,…
$ actiondetails_4_timespent        <fct> 1275, NA, NA, NA, NA, 174, 1433, NA, …
$ actiondetails_4_timespentpretty  <fct> 21 min 15s, NA, NA, NA, NA, 2 min 54s…
$ actiondetails_4_pageviewposition <fct> 2, 2, NA, 3, NA, 3, 2, NA, NA, 4, 3, …
$ actiondetails_4_title            <fct> HAnS, Event, NA, Site Search, NA, HAn…
$ actiondetails_4_subtitle         <fct> "https://hans.th-nuernberg.de/channel…
$ actiondetails_4_timestamp        <fct> 2023-10-04 20:27:44, 2023-10-04 19:47…
$ actiondetails_5_type             <fct> event, action, NA, action, NA, event,…
$ actiondetails_5_url              <fct> https://hans.th-nuernberg.de/channels…
$ actiondetails_5_pageidaction     <fct> 11, 6, NA, 16, NA, 11, 254, NA, NA, 2…
$ actiondetails_5_idpageview       <fct> ItPeDS, kvsynp, NA, oP3co8, NA, YNDCa…
$ actiondetails_5_servertimepretty <fct> "Oct 4, 2023 20:48:58", "Oct 4, 2023 …
$ actiondetails_5_pageid           <fct> 563, 313, NA, 97, NA, 6, 978, NA, NA,…
$ actiondetails_5_eventcategory    <fct> click_button, NA, NA, NA, NA, click_b…
$ actiondetails_5_eventaction      <fct> Kanäle, NA, NA, NA, NA, Medien, NA, N…
$ actiondetails_5_pageviewposition <fct> 2, 3, NA, 3, NA, 3, 3, NA, NA, 6, 3, …
$ actiondetails_5_timestamp        <fct> 2023-10-04 20:48:58, 2023-10-04 19:47…
$ actiondetails_5_title            <fct> Event, HAnS, NA, HAnS, NA, Event, HAn…
$ actiondetails_5_subtitle         <fct> "Category: \"\"click_button', Action:…
$ actiondetails_6_type             <fct> event, search, NA, event, NA, action,…
$ actiondetails_6_url              <fct> https://hans.th-nuernberg.de/channels…
$ actiondetails_6_pageidaction     <fct> 11, NA, NA, 17, NA, 6, 256, NA, NA, N…
$ actiondetails_6_idpageview       <fct> ItPeDS, rM5GmP, NA, oP3co8, NA, IT1VG…
$ actiondetails_6_servertimepretty <fct> "Oct 4, 2023 20:48:59", "Oct 4, 2023 …
$ actiondetails_6_pageid           <fct> 564, 314, NA, 98, NA, 7, 979, NA, NA,…
$ actiondetails_6_eventcategory    <fct> click_button, NA, NA, click_videocard…
$ actiondetails_6_eventaction      <fct> Medien, NA, NA, Kapitel9-5-Maschinens…
$ actiondetails_6_pageviewposition <fct> 2, 4, NA, 4, NA, 4, 3, NA, NA, 7, 3, …
$ actiondetails_6_timestamp        <fct> 2023-10-04 20:48:59, 2023-10-04 19:47…
$ actiondetails_6_title            <fct> Event, Site Search, NA, Event, NA, HA…
$ actiondetails_6_subtitle         <fct> "Category: \"\"click_button', Action:…
$ actiondetails_7_type             <fct> action, action, NA, action, NA, event…
$ actiondetails_7_url              <fct> https://hans.th-nuernberg.de/?evalId=…
$ actiondetails_7_pageidaction     <fct> 6, 16, NA, 58, NA, 7, 255, NA, NA, NA…
$ actiondetails_7_idpageview       <fct> Zaut9i, rM5GmP, NA, iyzwzi, NA, IT1VG…
$ actiondetails_7_servertimepretty <fct> "Oct 4, 2023 20:48:59", "Oct 4, 2023 …
$ actiondetails_7_pageid           <fct> 565, 315, NA, 99, NA, 8, 980, NA, NA,…
$ actiondetails_7_timespent        <fct> 1, 22, NA, 16, NA, NA, 112, NA, NA, N…
$ actiondetails_7_timespentpretty  <fct> 1s, 22s, NA, 16s, NA, NA, 1 min 52s, …
$ actiondetails_7_pageviewposition <fct> 3, 4, NA, 5, NA, 4, 4, NA, NA, 8, 3, …
$ actiondetails_7_title            <fct> HAnS, HAnS, NA, HAnS, NA, Event, HAnS…
$ actiondetails_7_subtitle         <fct> "https://hans.th-nuernberg.de/?evalId…
$ actiondetails_7_timestamp        <fct> 2023-10-04 20:48:59, 2023-10-04 19:47…

1.2.5 Datensatz data_slim, Zeilen 1-100

Show the code
tar_load(data_slim)

data_slim %>% 
  slice(1:100) |> 
  gt()
nr type value idvisit
0 type action 1
0 url https://hans.th-nuernberg.de/login?evalId=none&role=undefined 1
0 timestamp 2023-10-04 16:19:46 1
0 title HAnS 1
0 subtitle https://hans.th-nuernberg.de/login?evalId=none&role=undefined 1
0 pageloadtime 0.18s 1
0 pageloadtimemilliseconds 175 1
1 type event 1
1 url https://hans.th-nuernberg.de/login?evalId=none&role=undefined 1
1 title Event 1
1 subtitle Category: ""login', Action: ""success"" 1
1 timestamp 2023-10-04 16:19:54 1
1 eventcategory login 1
1 eventaction success 1
2 type action 1
2 url https://hans.th-nuernberg.de/?evalId=none&role=developer 1
2 timestamp 2023-10-04 16:19:54 1
2 title HAnS 1
2 subtitle https://hans.th-nuernberg.de/?evalId=none&role=developer 1
3 type event 1
3 url https://hans.th-nuernberg.de/?evalId=none&role=developer 1
3 eventcategory click_button 1
3 eventaction Kanäle 1
3 timestamp 2023-10-04 16:19:56 1
3 title Event 1
3 subtitle Category: ""click_button', Action: ""Kanäle"" 1
4 type action 1
4 url https://hans.th-nuernberg.de/channels?evalId=none&role=developer 1
4 title HAnS 1
4 subtitle https://hans.th-nuernberg.de/channels?evalId=none&role=developer 1
4 timestamp 2023-10-04 16:19:56 1
5 type event 1
5 url https://hans.th-nuernberg.de/channels?evalId=none&role=developer 1
5 eventcategory click_button 1
5 eventaction Medien 1
5 timestamp 2023-10-04 16:21:23 1
5 title Event 1
5 subtitle Category: ""click_button', Action: ""Medien"" 1
6 type action 1
6 url https://hans.th-nuernberg.de/?evalId=none&role=developer 1
6 timestamp 2023-10-04 16:21:23 1
6 title HAnS 1
6 subtitle https://hans.th-nuernberg.de/?evalId=none&role=developer 1
7 type event 1
7 url https://hans.th-nuernberg.de/?evalId=none&role=developer 1
7 title Event 1
7 subtitle Category: ""click_button', Action: ""Medien"" 1
7 timestamp 2023-10-04 16:25:22 1
7 eventcategory click_button 1
7 eventaction Medien 1
8 type event 1
8 url https://hans.th-nuernberg.de/?evalId=none&role=developer 1
8 title Event 1
8 subtitle Category: ""click_button', Action: ""Kanäle"" 1
8 timestamp 2023-10-04 16:25:23 1
8 eventcategory click_button 1
8 eventaction Kanäle 1
9 type action 1
9 url https://hans.th-nuernberg.de/channels?evalId=none&role=developer 1
9 timestamp 2023-10-04 16:25:23 1
9 title HAnS 1
9 subtitle https://hans.th-nuernberg.de/channels?evalId=none&role=developer 1
10 type action 1
10 url https://hans.th-nuernberg.de/?evalId=none&role=developer 1
10 title HAnS 1
10 subtitle https://hans.th-nuernberg.de/?evalId=none&role=developer 1
10 timestamp 2023-10-04 16:25:24 1
11 type event 1
11 url https://hans.th-nuernberg.de/channels?evalId=none&role=developer 1
11 eventcategory click_button 1
11 eventaction Medien 1
11 timestamp 2023-10-04 16:25:24 1
11 title Event 1
11 subtitle Category: ""click_button', Action: ""Medien"" 1
12 type action 1
12 url https://hans.th-nuernberg.de/channels?evalId=none&role=developer 1
12 title HAnS 1
12 subtitle https://hans.th-nuernberg.de/channels?evalId=none&role=developer 1
12 timestamp 2023-10-04 16:25:26 1
13 type event 1
13 url https://hans.th-nuernberg.de/?evalId=none&role=developer 1
13 title Event 1
13 subtitle Category: ""click_button', Action: ""Kanäle"" 1
13 timestamp 2023-10-04 16:25:26 1
13 eventcategory click_button 1
13 eventaction Kanäle 1
14 type event 1
14 url https://hans.th-nuernberg.de/channels?evalId=none&role=developer 1
14 eventcategory click_channelcard 1
14 eventaction GESOA 1
14 timestamp 2023-10-04 16:25:27 1
14 title Event 1
14 subtitle Category: ""click_channelcard', Action: ""GESOA"" 1
15 type search 1
15 title Site Search 1
15 subtitle GESOA 1
15 timestamp 2023-10-04 16:25:27 1
15 sitesearchkeyword GESOA 1
15 sitesearchcount 0 1
16 type action 1

1.3 Datensatz nur User

Entfernt man Developer, Admins und Lecturers aus dem Roh-Datensatz so bleiben weniger Zeilen übrig:

Show the code
tar_load(data_users_only)
  • 86 Zeilen
  • 2801 Spalten

1.4 Datensatz mit Anzahl der Aktionen pro User

Show the code
tar_load(count_action)

1.5 Zeitraum

1.5.1 Beginn/Ende der Daten

Show the code
tar_load(config)

Laut config.yaml ist das aktuelle Semester, d.h. 24-ss.

Show the code
tar_load(time_minmax)
Show the code
time_minmax |> 
  summarise(time_min = min(time_min),
            time_max = max(time_max)) |> 
  gt()
time_min time_max
2023-10-04 16:19:46 2023-10-09 22:21:28

Diese Statistik wurde auf Basis des Datenobjekts data_slim berechnet.

1.5.2 Days since last visit

Show the code
tar_load(time_since_last_visit)


time_since_last_visit <- 
time_since_last_visit |> 
  mutate(dayssincelastvisit = as.numeric(dayssincelastvisit)) 

time_since_last_visit |> 
  datawizard::describe_distribution(dayssincelastvisit) |> 
  knitr::kable()
Variable Mean SD IQR Min Max Skewness Kurtosis n n_Missing
dayssincelastvisit 1.0125 0.1114513 0 1 2 8.858956 77.44913 160 0
Show the code
time_since_last_visit |>
  ggplot(aes(x=dayssincelastvisit)) +
  geom_density()

1.6 Statistiken

Die folgenden Statistiken beruhen auf dem Datensatz data_slim:

Show the code
glimpse(data_slim)
Rows: 19,958
Columns: 4
$ nr      <int> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3,…
$ type    <fct> type, url, timestamp, title, subtitle, pageloadtime, pageloadt…
$ value   <chr> "action", "https://hans.th-nuernberg.de/login?evalId=none&role…
$ idvisit <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…

nr fasst die Nummer der Aktion innerhalb eines bestimmten Visits.

1.6.1 Mit allen Daten (den 499er-Daten)

Show the code
count_action |> 
  describe_distribution(nr_max) |> 
  gt() |> 
  fmt_number(columns = where(is.numeric),
             decimals = 2)
Variable Mean SD IQR Min Max Skewness Kurtosis n n_Missing
nr_max 33.94 48.36 46.50 1.00 211.00 1.97 3.46 86.00 0.00

nr_max gibt den Maximalwert von nr zurück, sagt also, wie viele Aktionen maximal von einem Visitor ausgeführt wurden.

Betrachtet man die Anzahl der Aktionen pro Visitor näher, so fällt auf, dass der Maximalwert (499) sehr häufig vorkommt:

Show the code
count_action |> 
  count(nr_max) |> 
  ggplot(aes(x = nr_max, y = n)) +
  geom_col()

Hier noch in einer anderen Darstellung:

Show the code
count_action |> 
  count(nr_max) |> 
  ggplot(aes(x = nr_max, y = n)) +
  geom_point()

Der Maximalwert ist einfach auffällig häufig:

Show the code
count_action |> 
  count(nr_max == 499) |> 
  gt()
nr_max == 499 n
FALSE 86

Es erscheint plausibel, dass der Maximalwert alle “gekappten” (zensierten, abgeschnittenen) Werte fasst, also viele Werte, die eigentlich größer wären (aber dann zensiert wurden).

1.6.2 Nur Visitors, für die weniger als 500 Aktionen protokolliert sind

Show the code
count_action2 <- 
count_action |> 
  filter(nr_max != 499) 

count_action2 |> 
  describe_distribution(nr_max) |> 
  gt() |> 
  fmt_number(columns = where(is.numeric),
             decimals = 2)
Variable Mean SD IQR Min Max Skewness Kurtosis n n_Missing
nr_max 33.94 48.36 46.50 1.00 211.00 1.97 3.46 86.00 0.00

1.7 Verteilung

1.7.1 Mit den 499er-Daten

Show the code
count_action_avg = mean(count_action$nr_max)
count_action_sd = sd(count_action$nr_max)

count_action |> 
  ggplot() +
  geom_histogram(aes(x = nr_max)) +
  labs(x = "Anzahl von Aktionen pro Visit",
       y = "n",
       caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD") +
  theme_minimal() +
  geom_vline(xintercept = count_action_avg,
             color = palette_okabe_ito()[1]) +
  geom_segment(x = count_action_avg-count_action_sd,
               y = 0,
               xend = count_action_avg + count_action_sd,
               yend = 0,
               color = palette_okabe_ito()[2],
               size = 2) +
  annotate("label", x = count_action_avg, y = 1500, label = "MW") +
  annotate("label", x = count_action_avg + count_action_sd, y = 0, label = "SD")

Show the code
  #geom_label(aes(x = count_action_avg), y = 1, label = "Mean")
  • Mittelwert der Aktionen pro Visit: 33.94.
  • SD der Aktionen pro Visit: 48.36.

1.7.2 Ohne 499er-Daten

Show the code
count_action_avg2 = mean(count_action2$nr_max)
count_action_sd2 = sd(count_action2$nr_max)

count_action2 |> 
  ggplot() +
  geom_histogram(aes(x = nr_max)) +
  labs(x = "Anzahl von Aktionen pro Visit",
       y = "n",
       title = "Verteilung der User-Aktionen pro Visit",
       caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD") +
  theme_minimal() +
  geom_vline(xintercept = count_action_avg2,
             color = palette_okabe_ito()[1]) +
  geom_segment(x = count_action_avg-count_action_sd2,
               y = 0,
               xend = count_action_avg2 + count_action_sd2,
               yend = 0,
               color = palette_okabe_ito()[2],
               size = 2) +
  annotate("label", x = count_action_avg2, y = 1500, label = "MW", vjust = "top") +
  annotate("label", x = count_action_avg2 + count_action_sd2, y = 0, label = "SD", vjust = "bottom")

Show the code
  #geom_label(aes(x = count_action_avg), y = 1, label = "Mean")
  • Mittelwert der Aktionen pro Visit: 33.94.
  • SD der Aktionen pro Visit: 48.36.

2 Zeit pro Visit

Die Visit-Zeit wurde auf 600 Min. trunkiert/begrenzt.

Show the code
tar_load(time_spent)
tar_load(time_duration)

time_spent <- 
  time_spent |> 
  mutate(t_min = as.numeric(time_diff, units = "mins")) |> 
  filter(t_min < 600)

2.1 Verweildauer-Statistiken in Sekunden

Show the code
time_spent |> 
  summarise(
    mean_time_diff = round(mean(time_diff), 2),
    sd_time_diff = sd(time_diff),
    min_time_diff = min(time_diff),
    max_time_diff = max(time_diff)
  ) |> 
  summarise(
    mean_time_diff_avg = mean(mean_time_diff),
    sd_time_diff_avg = mean(sd_time_diff, na.rm = TRUE),
    min_time_diff_avg = mean(min_time_diff),
    max_time_diff_avg = mean(max_time_diff)
  ) |> 
  gt() |> 
  fmt_number(columns = everything(),
             decimals = 2)
mean_time_diff_avg sd_time_diff_avg min_time_diff_avg max_time_diff_avg
169.23 0.00 169.23 169.23
Show the code
tar_load(time_duration)

time_duration |> 
  summarise(duration_sec_avg = mean(visitduration_sec, na.rm = TRUE))  |> 
  mutate(duration_min_avg = duration_sec_avg / 60)
  duration_sec_avg duration_min_avg
1         961.3875         16.02313

2.2 Verweildauer-Statistiken in Minuten

Show the code
time_spent |> 
  summarise(
    mean_t_min = mean(t_min),
    sd_t_min = sd(t_min),
    min_t_min = min(t_min),
    max_t_min = max(t_min)
  ) |> 
   summarise(
    mean_t_min_avg = mean(mean_t_min),
    sd_t_min_avg = mean(sd_t_min, na.rm = TRUE),
    min_t_min_avg = mean(min_t_min),
    max_t_min_avg = mean(max_t_min)
  ) |>
  gt() |> 
  fmt_number(columns = everything(),
             decimals = 2)
mean_t_min_avg sd_t_min_avg min_t_min_avg max_t_min_avg
169.23 0.00 169.23 169.23

2.3 Visualisierung der Verweildauer

2.3.1 bins=20

Show the code
time_spent |> 
  ggplot(aes(x = t_min)) +
  geom_histogram() +
  scale_x_time() +
  theme_minimal() +
  labs(y = "n",
       x = "Verweildauer in HaNS pro Visit in Minuten")

2.3.2 bins=100

Show the code
time_spent |> 
  ggplot(aes(x = t_min)) +
  geom_histogram(binwidth = 5) +
  theme_minimal() +
  labs(y = "n",
       x = "Verweildauer in Minuten",
       title = "Verweildauer in HaNS pro Visit",
       caption = "binwidth = 5 Min.")

2.3.3 Zeitdauer begrenzt auf 1-120 Min.

Show the code
time_spent2 <- 
time_spent |> 
  filter(t_min > 1, t_min < 120) 

time_spent2 |> 
  ggplot(aes(x = t_min)) +
  geom_histogram(binwidth = 10) +
  theme_minimal() +
  labs(y = "n",
       x = "Verweildauer in HaNS pro Visit in Minuten",
       title = "Verweildauer begrenzt auf 1-120 Minuten",
       caption = "bindwidth = 10 Min.")

3 Was machen die User?

Show the code
tar_load(count_action_type)

3.1 Häufigkeiten

3.1.1 Nach Kategorien

Show the code
count_action_type |> 
  count(category, sort = TRUE) |> 
  mutate(prop = round(n/sum(n), 2)) |> 
  gt()
category n prop
video 1895 0.63
visit_page 417 0.14
click_slideChange 396 0.13
NA 86 0.03
click_topic 51 0.02
login 49 0.02
Search Results Count 43 0.01
in_media_search 26 0.01
Medien 23 0.01
GESOA 9 0.00
Kanäle 9 0.00
click_channelcard 1 0.00

3.1.2 eventcategory

Was machen die Visitors eigentlich? Und wie oft?

Show the code
data_slim |> 
  filter(type == "eventcategory") |> 
  count(value, sort = TRUE) |> 
  gt()
value n
videoplayer_click 1719
click_slideChange 396
click_button 50
login 49
click_start_resize 33
click_stop_resize 33
click_videocard 31
click_topic_details 30
click_topic_position_using_image 20
click_transcript_word 20
in_media_search 18
logout 10
click_videocard_search_lecturer 9
in_media_search_results 5
click_channelcard 4
eval 4
click_in_media_search_results 3
click_toggle 2
click_videocard_search_course_acronym 2
click_topic_position_using_link 1
userRole 1

3.2 Verteilung

3.2.1 Rohwerte

Show the code
count_action_type |> 
  count(category, sort = TRUE) |> 
  ggplot(aes(y = reorder(category, n), x = n)) +
  geom_col() +
  geom_bar_text() +
  labs(
    x = "User-Aktion",
    y = "Aktion",
    title = "Anzahl der User-Aktionen nach Kategorie"
  ) +
  theme_minimal() +
  scale_x_continuous(labels = scales::comma)

3.2.2 Log-Skalierung

Show the code
count_action_type |> 
  count(category, sort = TRUE) |> 
  ggplot(aes(y = reorder(category, n), x = n)) +
  geom_col() +
  geom_bar_text() +
  labs(
    x = "Anazhl der User-Aktionen",
    y = "Aktion",
    title = "Anzahl der User-Aktionen nach Kategorie",
    caption = "Log10-Skala"
  ) +
  theme_minimal() +
  scale_x_log10()

4 An welchen Tagen und zu welcher Zeit kommen die User zu HaNS?

4.1 Setup

Show the code
tar_load(time_visit_wday)
Show the code
# Define a vector with the names of the days of the week
# Note: Adjust the start of the week (Sunday or Monday) as per your requirement
days_of_week <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")

# Replace numbers with day names
time_visit_wday$dow2 <- factor(days_of_week[time_visit_wday$dow],
                               levels = days_of_week)

4.2 HaNS-Login nach Uhrzeit

Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(hour) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = hour, y = prop)) +
  geom_col() +
  theme_minimal() +
  labs(
    title = "HaNS-Nutzer sind keine Frühaufsteher",
    x = "Uhrzeit",
    y = "Anteil"
  )

Show the code
 # coord_polar()
Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(hour) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = hour, y = prop)) +
  geom_col() +
  theme_minimal() +
  coord_polar()

4.3 Verteilung der HaNS-Besuche nach Wochentagen

Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(dow2) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = dow2, y = prop)) +
  geom_col() +
  theme_minimal() +
  labs(title = "Verteilung der HaNS-Logins nach Wochentagen",
       x = "Wochentag",
       y = "Anteil")

Show the code
 # coord_polar()
Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(dow2) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = dow2, y = prop)) +
  geom_col() +
  theme_minimal() +
  labs(title = "Verteilung der HaNS-Logins nach Wochentagen",
       x = "Wochentag",
       y = "Anteil")  +
  coord_polar()

4.3.1 HaNS-Login nach Wochentagen Uhrzeit

Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(dow2, hour) |> 
  group_by(dow2) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = hour, y = prop)) +
  geom_col() +
  facet_wrap(~ dow2) +
  theme_minimal() +
  labs(title = "Verteilung der HaNS-Logins nach Wochentagen und Uhrzeiten",
       x = "Wochentag",
       y = "Anteil")

Show the code
 # coord_polar()
Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(dow2, hour) |> 
  group_by(dow2) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = hour, y = prop)) +
  geom_col() +
  facet_wrap(~ dow2) +
  theme_minimal() +
  labs(title = "Verteilung der HaNS-Logins nach Wochentagen und Uhrzeiten",
       x = "Wochentag",
       y = "Anteil") +
  coord_polar()

4.4 Anzahl der Visits nach Datum (Tagen) und Uhrzeit (bin2d)

Show the code
time2 <- 
time_visit_wday |> 
  ungroup() |> 
  mutate(date = as.Date(date_time))

time2 |> 
  ggplot(aes(x = date, y = hour)) +
  geom_bin2d(binwidth = c(1, 1)) + # (1 day, 1 hour)
  scale_x_date(date_breaks = "1 month") +
  theme(legend.position = "bottom") +
  scale_fill_viridis_c() +
  labs(caption = "Each x-bin maps to one week")

4.5 Anzahl der Visits nach Datum (Wochen) und Uhrzeit (bin2d)

Show the code
time2 |> 
  ggplot(aes(x = date, y = hour)) +
  geom_bin2d(binwidth = c(7, 1)) +  # 1 week, 1 hour
  scale_x_date(date_breaks = "1 week", date_labels = "%W") +
  theme(legend.position = "bottom") +
  scale_fill_viridis_c()  +
  labs(x = "Week number in 2023/2024",
       caption = "Each x-bin maps to one week")

4.6 Anzahl der Visits nach Datum (Wochen) und Wochentag (bin2d)

Show the code
time2 |> 
  ggplot(aes(x = date, y = dow)) +
  geom_bin2d(binwidth = c(7, 1)) +  # 1 week, 1 hour
  scale_x_date(date_breaks = "1 week", date_labels = "%W") +
  theme(legend.position = "bottom") +
  scale_fill_viridis_c()  +
  labs(x = "Week number in 2023/2024",
       caption = "Each x-bin maps to one week",
       y = "Day of Week") +
  scale_y_continuous(breaks = 1:7)

5 KI-Gebrauch

5.1 Interaktion mit dem LLM

5.1.1 Art und Anzahl der Interaktionen mit dem LLM

Show the code
data_slim |> 
  filter(type == "eventcategory") |> 
  filter(str_detect(value, "llm")) |> 
  count(value, sort = TRUE) |> 
  mutate(prop = n / round(sum(n), 2)) |> 
  gt()
value n prop

5.1.2 Anteil Visitors, die mit dem LLM interagieren

Show the code
data_slim |> 
  mutate(has_llm = str_detect(value, "llm"))  |> 
  group_by(idvisit) |> 
  summarise(llm_used_during_visit = any(has_llm == TRUE)) |> 
  count(llm_used_during_visit) |> 
  mutate(prop = round(n /sum(n), 2)) |> 
  gt()
llm_used_during_visit n prop
FALSE 86 1

5.1.3 … Im Zeitverlauf

Show the code
tar_load(idvisit_has_llm)

idvisit_has_llm |> 
  count(year_month, uses_llm) |> 
  ungroup() |> 
  group_by(year_month) |> 
  mutate(prop = round(n/sum(n), 2)) |> 
  gt()
uses_llm n prop
2023-10
FALSE 95 1
Show the code
idvisit_has_llm |> 
  count(year_month, uses_llm) |> 
  ungroup() |> 
  group_by(year_month) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = year_month, y = prop, color = uses_llm, groups = uses_llm)) +
  geom_point() +
  geom_line(aes(group = uses_llm)) +
  labs(title = "Visitors, die mit dem LLM interagieren im Zeitverlauf (Anteile)")

Show the code
idvisit_has_llm |> 
  count(year_month, uses_llm) |> 
  ungroup() |> 
  group_by(year_month) |> 
  ggplot(aes(x = year_month, y = n, color = uses_llm, groups = uses_llm)) +
  geom_point() +
  geom_line(aes(group = uses_llm)) +
  labs(title = "Visitors, die mit dem LLM interagieren im Zeitverlauf (Anzahl)")

5.2 Klick auf ein Wort im Transkript

Show the code
tar_load(data_slim)
Show the code
data_slim |> 
  filter(type == "subtitle") |> 
  filter(!is.na(value) & value != "") |> 
  count(click_transcript_word = str_detect(value, "click_transcript_word")) |> 
  mutate(prop = round(n/sum(n), 2)) |> 
  gt()
click_transcript_word n prop
FALSE 2985 0.99
TRUE 20 0.01

5.3 KI-Aktionen

5.3.1 Insgesamt (ganzer Zeitraum)

Show the code
tar_load(data_long)

5.3.1.1 Im Detail

Show the code
ai_actions_count <- 
  data_long |> 
  filter(str_detect(value, "transcript")) |> 
  count(value) 

ai_actions_count |> 
  gt()
value n
Category: ""click_transcript_word', Action: ""word: Bei - pos: 346.3 - index: 765"" 1
Category: ""click_transcript_word', Action: ""word: Diesen - pos: 453.9 - index: 1009"" 1
Category: ""click_transcript_word', Action: ""word: Fall - pos: 540.22 - index: 1233"" 1
Category: ""click_transcript_word', Action: ""word: Ja, - pos: 426.74 - index: 944"" 1
Category: ""click_transcript_word', Action: ""word: Mayer - pos: 1140.58 - index: 2628"" 2
Category: ""click_transcript_word', Action: ""word: Professionen - pos: 356.74 - index: 797"" 1
Category: ""click_transcript_word', Action: ""word: Promotionsrecht, - pos: 823.28 - index: 1864"" 1
Category: ""click_transcript_word', Action: ""word: Schauen - pos: 498.2 - index: 1124"" 1
Category: ""click_transcript_word', Action: ""word: Und - pos: 979.54 - index: 2229"" 1
Category: ""click_transcript_word', Action: ""word: Wenn - pos: 330.04 - index: 731"" 1
Category: ""click_transcript_word', Action: ""word: Wir - pos: 659.98 - index: 1501"" 1
Category: ""click_transcript_word', Action: ""word: die - pos: 1731.44 - index: 3895"" 1
Category: ""click_transcript_word', Action: ""word: gibt - pos: 473.12 - index: 1064"" 1
Category: ""click_transcript_word', Action: ""word: hat, - pos: 382.66 - index: 850"" 1
Category: ""click_transcript_word', Action: ""word: keinerlei - pos: 1740.08 - index: 3917"" 1
Category: ""click_transcript_word', Action: ""word: kleinen - pos: 972.18 - index: 2213"" 1
Category: ""click_transcript_word', Action: ""word: macht - pos: 126.54 - index: 260"" 1
Category: ""click_transcript_word', Action: ""word: sind - pos: 1098.82 - index: 2536"" 1
Category: ""click_transcript_word', Action: ""word: wissenschaftlichen - pos: 415.98 - index: 920"" 1
click_transcript_word 20

5.3.1.2 Zusammengefasst nach “click transcript word”

Show the code
ai_actions_count |> 
  mutate(value = case_when(
    str_detect(value, "click_transcript_word.*") ~ "click transcript word",
    TRUE ~ value
  )) |> 
  count(value, sort = TRUE) |> 
  gt()
value n
click transcript word 20

5.3.2 KI-Klicks pro Monat

Show the code
tar_load(ai_transcript_clicks_per_month)
Show the code
ai_transcript_clicks_per_month |> 
  count(year_month, clicks_transcript_any) |> 
  ungroup() |> 
  group_by(year_month) |> 
  mutate(prop = round(n/sum(n), 2)) |> 
  gt()
clicks_transcript_any n prop
2023-10
FALSE 87 0.92
TRUE 8 0.08